LinearRegression-model
LinearRegression-estimated
LinearRegression-error
LinearRegression-OLS
myDataFrame <- read.csv("Data/regression_auto.csv")
head(myDataFrame)
## make mpg weight weight1 price foreign repairs length
## 1 AMC 22 2930 2.93 4099 0 3 186
## 2 AMC 17 3350 3.35 4749 0 3 173
## 3 AMC 22 2640 2.64 3799 0 3 168
## 4 Audi 17 2830 2.83 9690 1 5 189
## 5 Audi 23 2070 2.07 6295 1 3 174
## 6 BMW 25 2650 2.65 9735 1 4 177
attach(myDataFrame)
# Define variables
Y <- cbind(mpg)
X1 <- cbind(weight1)
X <- cbind(weight1, price, foreign)
# Descriptive statistics
summary(Y)
## mpg
## Min. :14.00
## 1st Qu.:17.25
## Median :21.00
## Mean :20.92
## 3rd Qu.:23.00
## Max. :35.00
summary(X1)
## weight1
## Min. :2.020
## 1st Qu.:2.643
## Median :3.200
## Mean :3.099
## 3rd Qu.:3.610
## Max. :4.330
summary(X)
## weight1 price foreign
## Min. :2.020 Min. : 3299 Min. :0.0000
## 1st Qu.:2.643 1st Qu.: 4466 1st Qu.:0.0000
## Median :3.200 Median : 5146 Median :0.0000
## Mean :3.099 Mean : 6652 Mean :0.2692
## 3rd Qu.:3.610 3rd Qu.: 8054 3rd Qu.:0.7500
## Max. :4.330 Max. :15906 Max. :1.0000
# Correlation among variables
cor(Y, X)
## weight1 price foreign
## mpg -0.8081609 -0.4384618 0.4003376
# Plotting data on a scatter diagram
plot(Y ~ X1, data = myDataFrame)
# Simple linear regression
simpleLinearRegression <- lm(Y ~ X1)
summary(simpleLinearRegression)
##
## Call:
## lm(formula = Y ~ X1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4123 -1.6073 -0.1043 0.9261 8.1072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.0665 2.6112 14.578 2.02e-13 ***
## X1 -5.5315 0.8229 -6.722 5.93e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.86 on 24 degrees of freedom
## Multiple R-squared: 0.6531, Adjusted R-squared: 0.6387
## F-statistic: 45.19 on 1 and 24 DF, p-value: 5.935e-07
confint(simpleLinearRegression, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 32.677256 43.455664
## X1 -7.229797 -3.833196
anova(simpleLinearRegression)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 369.57 369.57 45.189 5.935e-07 ***
## Residuals 24 196.28 8.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Plotting regression line
abline(simpleLinearRegression)
# Predicted values for dependent variable
predValuesDepVarY <- fitted(simpleLinearRegression)
summary(predValuesDepVarY)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.12 18.10 20.37 20.92 23.45 26.89
plot(predValuesDepVarY ~ X1)
# Regression residuals
regressionResiduals <- resid(simpleLinearRegression)
summary(regressionResiduals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.4120 -1.6070 -0.1043 0.0000 0.9261 8.1070
plot(regressionResiduals ~ X1)
# Multiple linear regression
multipleLinearRegression <- lm(Y ~ X)
summary(multipleLinearRegression)
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6942 -1.1857 -0.0452 0.6433 8.6895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.1661962 4.2647533 9.887 1.48e-09 ***
## Xweight1 -7.1211114 1.6046735 -4.438 0.000207 ***
## Xprice 0.0002258 0.0002654 0.851 0.404002
## Xforeign -2.5071265 2.0565685 -1.219 0.235723
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.89 on 22 degrees of freedom
## Multiple R-squared: 0.6752, Adjusted R-squared: 0.6309
## F-statistic: 15.25 on 3 and 22 DF, p-value: 1.374e-05
confint(multipleLinearRegression, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 3.332164e+01 51.0107531780
## Xweight1 -1.044900e+01 -3.7932221856
## Xprice -3.245229e-04 0.0007760878
## Xforeign -6.772188e+00 1.7579354345
anova(multipleLinearRegression)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X 3 382.08 127.360 15.247 1.374e-05 ***
## Residuals 22 183.77 8.353
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Predicted values for dependent variable
predValuesDepVarMultiY <- fitted(multipleLinearRegression)
summary(predValuesDepVarMultiY)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.90 17.91 20.46 20.92 23.99 27.89
# Regression residuals
regressionResidualsMulti <- resid(multipleLinearRegression)
summary(regressionResidualsMulti)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.69400 -1.18600 -0.04524 0.00000 0.64330 8.68900